# -*- coding: utf-8 -*-
"""
Created on Wed May 29 13:30:23 2024

@author: 123
"""
import pandas as pd
import chardet
import numpy as np
import os


################### 重采样并计算均值 温度 气压 相对湿度 降水速率 风速 前几分钟算数平均 #时间步长 如要修改 直接改10/30T 1H
# tt='10T'
# tt='30T'
tt='5T'
###################


# 文件路径
blb_column=pd.read_csv("J:\\NJ MWR\\Y2022\\blb_column.csv")
blb_column_value=blb_column.values
# 将二维数组转换成一维数组
blb_column_1d = blb_column_value.ravel()
time_column=np.array(['year','month','day','hour','minute','second'])
column_all= np.concatenate((time_column, blb_column_1d))
####LS用一个变量坐标以上不改


#文件夹路径
folder_path = "J:\\BJ MWR\\Y2019\\blb\\"
nameyear=folder_path[11:15]
yearls=nameyear[2:]
# namemonth=folder_path[17:19]
# namelist=folder_path[20:23]
namelist=folder_path[16:19]

# 获取文件夹内所有CSV文件的文件名
csv_files = [file for file in os.listdir(folder_path) if file.endswith('.ASC')]
dataall = pd.DataFrame()
# 循环读取每个CSV文件并将其合并到merged_data中
for csv_file in csv_files:

    file_path = os.path.join(folder_path, csv_file)
    print('合并'+csv_file)
    # # 检测文件编码
    # encoding = detect_encoding(file_path)


    # 使用检测到的编码读取文件内容
    with open(file_path, 'r', encoding='ISO-8859-1') as file:
        content = file.read()
    # 将content转为DataFrame
    # 先按行分割
    lines = content.split('\n')
    # 将每行按逗号分割成列
    data = [line.split(',') for line in lines if line.strip() != '']
    ls = int(data[1][0].split()[0])
    #第一行
    time0=data[12]
    data0 = data[14:56]
    data0_array = np.array(data0)
    float0_array = data0_array.astype(float)[:,1:]#去掉行频率
    # 将二维数组转换成一行
    one_dim_array0 = float0_array.ravel().reshape(1, -1)
    #后续拼接
    for i in range(1,ls):
    # for i in range(1,4):
        time1=data[i*45+12]
        time0.extend(time1)
        data1 = data[i*45+14:i*45+56]
        # print(i)
        data1_array = np.array(data1)
        float_array = data1_array.astype(float)[:,1:]#去掉行频率
    # 将二维数组转换成一维数组
        one_dim_array = float_array.ravel()
        # alldata=np.vstack((one_dim_array0, one_dim_array))
        alldata = np.concatenate((one_dim_array0, [one_dim_array]), axis=0)
        one_dim_array0=alldata
    timenp=np.array(time0).reshape(ls, 8).astype(float)[:,:6]
    datals = np.column_stack((timenp, one_dim_array0))#一个文件
    df = pd.DataFrame(datals,columns=column_all)
    dataall = pd.concat([dataall, df])

print('计算中')
# print(data.dtypes)
# 判断第一列是否以2开头的数字 #剔除异常数据

condition = ~dataall['year'].astype(str).str.startswith(yearls)
# 将不符合条件的整行替换为缺失值
dataall.loc[condition, :] = np.nan
# 删除整行都缺失的行
dataall.dropna(how='all', inplace=True)

lsls = dataall[['year', 'month', 'day', 'hour', 'minute', 'second']].astype(int).astype(str).agg('-'.join, axis=1)
dataall['Timestamp'] = pd.to_datetime(lsls, format='%y-%m-%d-%H-%M-%S')
# 将Timestamp列设置为索引
dataall.set_index('Timestamp', inplace=True)
# 重新设置Timestamp列为普通列
dataall.reset_index(inplace=True)

combined_data_path = 'J:\\BJ MWR\\'+namelist+'\\'+tt+'\\'+nameyear+'_'+namelist+'_'+tt+'+.csv'
# # # combined_data_path = 'J:\\NJ MWR\\'+namelist+'\\'+nameyear+namemonth+'_'+namelist+'_'+tt+'+.csv'
dataall.to_csv(combined_data_path, index=False,encoding='utf-8')



